import json
import os
import shutil

import requests
from bs4 import BeautifulSoup

target_file = r"D:\image\spider\Scrapy_movies"
if os.path.exists(target_file):
    shutil.rmtree(target_file)
os.makedirs(target_file)




def html_every_page():
    htmls = []
    for page in range(1):

        url = f"https://ssr1.scrape.center/page/{page+1}"
        response = requests.get(url)
        if response.status_code == 200:
            print(f"正在爬取第{page+1}页.......")
            html = response.text
            htmls.append(html)

        else:
            raise Exception("请求失败")
    return htmls


def parse_every_html(html):
    global category
    datas = []
    soup = BeautifulSoup(html, "html.parser")
    articles = soup.find_all("div" , class_=["el-card" ,"item", "m-t" ,"is-hover-shadow"])

    for article in articles:
        img_url = article.find("img").get("src")

        title = article.find('h2').get_text()
        categories = article.find_all("button", class_="category")
        category_list = []
        for category in categories:
            category = category.find("span").get_text()
            category_list.append(category)

        score = article.select_one('.score').get_text ()
        datas.append(
            {
                "title": title,
                "img_url": img_url,
                "categories": category_list,
                "score": score
            }
        )
        path = os.path.join(target_file, f"{title}.jpg")
        img_response = requests.get(img_url)

        print(img_response,1111)

        with open(path, "wb") as f :
            f.write(img_response.content)
        print(f"标题：{title}，分类：{category}，分数：{score},img_url:{img_url}")
    return datas






if __name__ == "__main__":
    all_datas = []
    htmls = html_every_page()
    for html in htmls:
        data = parse_every_html(html)
        all_datas.append(data)
    with open("Scrapy_movie.json", "w", encoding="utf-8") as f:
        json.dump(all_datas, f, ensure_ascii=False, indent=4, separators=(',', ': '))
